{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize MLeap libraries before Scikit/Pandas\n",
    "import mleap.sklearn.preprocessing.data\n",
    "import mleap.sklearn.pipeline\n",
    "from mleap.sklearn.preprocessing.data import FeatureExtractor\n",
    "\n",
    "# Import Scikit Transformer(s)\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.pipeline import Pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a pandas DataFrame\n",
    "df = pd.DataFrame(np.random.randn(10, 5), columns=['a', 'b', 'c', 'd', 'e'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize a FeatureExtractor, which subselects only the features we want\n",
    "# to run the Standard Scaler against\n",
    "input_features = ['a', 'c', 'd']\n",
    "output_vector_name = 'unscaled_continuous_features' # Used only for serialization purposes\n",
    "output_features = [\"{}_scaled\".format(x) for x in input_features]\n",
    "\n",
    "feature_extractor_tf = FeatureExtractor(input_scalars=input_features,\n",
    "                                        output_vector=output_vector_name,\n",
    "                                        output_vector_items=output_features)\n",
    "\n",
    "\n",
    "# Define the Standard Scaler as we normally would\n",
    "standard_scaler_tf = StandardScaler(with_mean=True,\n",
    "                                    with_std=True)\n",
    "\n",
    "# Execute ML-Init to add the require attributes to the transformer object\n",
    "# Op and Name will be initialized automatically\n",
    "standard_scaler_tf.mlinit(prior_tf=feature_extractor_tf,\n",
    "                          output_features='scaled_continuous_features')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Now let's create a small pipeline using the Feature Extractor and the Standard Scaler\n",
    "standard_scaler_pipeline = Pipeline([(feature_extractor_tf.name, feature_extractor_tf),\n",
    "                                     (standard_scaler_tf.name, standard_scaler_tf)])\n",
    "standard_scaler_pipeline.mlinit()\n",
    "\n",
    "# Now let's run it on our test DataFrame\n",
    "transformed = standard_scaler_pipeline.fit_transform(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'numpy.ndarray' object has no attribute 'show'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-33-d6f3bb5baf59>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtransformed\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m: 'numpy.ndarray' object has no attribute 'show'"
     ]
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "standard_scaler_pipeline.serialize_to_bundle('/tmp', 'mleap-scikit-test-pipeline', init=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.09029437,  1.24322618,  0.09202772],\n",
       "       [ 0.35401194,  0.041046  ,  1.24484921],\n",
       "       [-0.48500233,  0.30570581, -1.17262211],\n",
       "       [ 1.05378137, -2.24270705,  0.35969212],\n",
       "       [ 1.25162721, -0.44636438,  0.97205378],\n",
       "       [ 0.41815318,  1.41509257, -0.75781534],\n",
       "       [-1.47095173,  0.24305907,  0.47901965],\n",
       "       [ 1.34153948, -0.68338176, -1.34602841],\n",
       "       [-1.20575338, -0.48280075,  1.36224404],\n",
       "       [-1.16711136,  0.6071243 , -1.23342065]])"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "transformed"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
